# coding:utf-8
import sqlite3
import sys

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'usage: python %s db_file_path' % __file__
        exit()
    conn = sqlite3.connect(sys.argv[1])
    c = conn.cursor()
    c.execute("SELECT * FROM phrases")
    # 移除unicode扩展B区的字符U＋20000 － U＋2a6df
    to_delete_ids = []
    for phrase in c.fetchall():
        o = ord(phrase[8][0])
        if 131072 <= o <=173791:
            print phrase,ord(phrase[8][0])
            to_delete_ids.append('%d' % phrase[0])
    c.execute('delete from phrases where id in (%s)' % ','.join(to_delete_ids))
    conn.commit()
    print 'removed %d char(s) in cjk extension B' % len(to_delete_ids)



